################################################################################################
# This script takes the cleaned SV and QV data from Qualtrics and combines these. We call this
# the "population data". The scripts tallies the referenda results using simple majority
# and generates eight histograms (as in the Columbia Elections paper) and eight CDFs for the 
# strength of preferences. These are as follows: 
# 
# - Take bins of size 10 and assign a positive (negative) sign to points attributed to a 
#   proposal the subject favors (is against).  
# - Then construct the histograms for each proposal (x4) and for each condition (x2) with an 
#   absolute count of how many subjects fall on each bin.  
# - All subjects who voted InFavor/Against but gave 0 points were given +/- 1 accordingly.
# - All abstentions are placed at 0, even if they gave them points. But a note has been 
#   included to isolate the number of abstentions (on the side).
# 
# Exports the four histograms and fourt CDFs to the 
# /California Stage 2 - Experiment Plots/Population Preference Intensity Histograms & CDFs folder. 
# 
# Author: Luis S.
# Last Modified: 7.23.16
################################################################################################

library(dplyr)
library(ggplot2)

# setwd("C:/Users/Luis/Dropbox/Research/SV vs QV California Experiment/Experiment Analysis & Results/")

###################################
######## Read SV & QV Data ########
###################################

# Read in the data
SV_data <- read.csv("California Stage 2 - SV & QV Data/Clean Data/California_MTurk_SV_Stage_2_clean_prefrecoded.csv", header = T, stringsAsFactors = F)
QV_data <- read.csv("California Stage 2 - SV & QV Data/Clean Data/California_MTurk_QV_Stage_2_clean_prefrecoded.csv", header = T, stringsAsFactors = F)

# Creates columns to keep track of subjects who abstained and those who abstained AND assigned points (_AbstainWP). 
SV_data <- mutate(SV_data, TeachersPref_Abstain = ifelse(Ref2_TeacherTenure == "Abstain", 1, 0), 
                  BondsPref_Abstain = ifelse(Ref3_PublicBonds == "Abstain", 1, 0), 
                  EducationPref_Abstain = ifelse(Ref1_BilingualEduc == "Abstain", 1, 0), 
                  ImmigrationPref_Abstain = ifelse(Ref4_Immigration == "Abstain", 1, 0),
                  TeachersPref_AbstainWP = 0, BondsPref_AbstainWP = 0, EducationPref_AbstainWP = 0, ImmigrationPref_AbstainWP = 0)

# Creates columns to keep track of subjects who abstained and those who abstained AND assigned points (_AbstainWP). 
QV_data <- mutate(QV_data, TeachersPref_Abstain = ifelse(Ref2_TeacherTenure == "Abstain", 1, 0), 
                  BondsPref_Abstain = ifelse(Ref3_PublicBonds == "Abstain", 1, 0), 
                  EducationPref_Abstain = ifelse(Ref1_BilingualEduc == "Abstain", 1, 0), 
                  ImmigrationPref_Abstain = ifelse(Ref4_Immigration == "Abstain", 1, 0),
                  TeachersPref_AbstainWP = 0, BondsPref_AbstainWP = 0, EducationPref_AbstainWP = 0, ImmigrationPref_AbstainWP = 0)


# Create populaltion dataset
SV_data_temp <- select(SV_data, MID, Ref1_BilingualEduc, EducationPref, Ref2_TeacherTenure, TeachersPref, 
                       Ref3_PublicBonds, BondsPref, Ref4_Immigration, ImmigrationPref) %>% mutate(Condition = "SV")

QV_data_temp <- select(QV_data, MID, Ref1_BilingualEduc, EducationPref, Ref2_TeacherTenure, TeachersPref, 
                       Ref3_PublicBonds, BondsPref, Ref4_Immigration, ImmigrationPref) %>% mutate(Condition = "QV")

Pop_data <- bind_rows(SV_data_temp, QV_data_temp)


# Recode data to positive and export it
Pop_data_allPositive <- mutate(Pop_data, EducationPref = abs(EducationPref), TeachersPref = abs(TeachersPref), 
                               BondsPref = abs(BondsPref), ImmigrationPref = abs(ImmigrationPref))   

write.csv(Pop_data_allPositive, "California Stage 2 - SV & QV Data/Clean Data/PopulationData(SV&QV)_SimpleVoting&Recoded_AllPos.csv", row.names = F)



######################################
##### Histrogram & CDF Functions #####
######################################

ProposalHist <- function(Pop_Data, Proposal)
{
  #--- Set Graphing Parameters
  
  binWidth <- 10
  plotMax <- 120
  
  bins_width10 <- c("[-100,-90)", "[-90,-80)", "[-80,-70)", "[-70,-60)", "[-60,-50)", "[-50,-40)", 
                    "[-40,-30)", "[-30,-20)", "[-20,-10)", "[-10,0)", "0", "(0,10]", "(10,20]", 
                    "(20,30]", "(30,40]", "(40,50]", "(50,60]", "(60,70]", "(70,80]", "(80,90]", "(90,100]")
  
  bins10 <- data.frame(Bins = bins_width10, BinOrder = seq(1,21,1), stringsAsFactors = F)
  bins10 <- mutate(bins10, BinColor = ifelse(Bins=="0",1,0))
  
  #--- Determines the current proposal
  
  if(Proposal == "BilingualEduc")
  {
    propTitle <- "Bilingual Education"
    Pop_data[["CurrentPref"]] <- Pop_data[["EducationPref"]]
  }
  if(Proposal == "Immigration")
  {
    propTitle <- "Immigration"
    Pop_data[["CurrentPref"]] <- Pop_data[["ImmigrationPref"]]
  }
  if(Proposal == "TeacherTenure")
  {
    propTitle <- "Teacher Tenure"
    Pop_data[["CurrentPref"]] <- Pop_data[["TeachersPref"]]
  }
  if(Proposal == "PublicBonds")
  {
    propTitle <- "Public Bonds"
    Pop_data[["CurrentPref"]] <- Pop_data[["BondsPref"]]
  }
  
  #--- Prepate data
  
  ProposalPos <- filter(Pop_data, CurrentPref > 0) %>% mutate(CurrentPref_Bins = cut(CurrentPref, right = T, breaks = seq(0, 100, binWidth)))
  ProposalNeg <- filter(Pop_data, CurrentPref < 0) %>% mutate(CurrentPref_Bins = cut(CurrentPref, right = F, breaks = seq(-100, 0, binWidth)))
  ProposalZero <- filter(Pop_data, CurrentPref == 0) %>% mutate(CurrentPref_Bins = "0")
  
  CurrentProp <- bind_rows(ProposalNeg, ProposalZero, ProposalPos) %>% select(MID, CurrentPref, CurrentPref_Bins)
  CurrentProp <- group_by(CurrentProp, CurrentPref_Bins) %>% summarise(NumSubjects = n()) %>% rename(Bins = CurrentPref_Bins)
  CurrentProp <- full_join(CurrentProp, bins10, by = "Bins")
  CurrentProp$Bins <- factor(CurrentProp$Bins, levels = CurrentProp$Bins[order(CurrentProp$BinOrder)])
  
  #--- Create plot
  
  intensityPlot <- ggplot(data = CurrentProp, aes(x=Bins, y = NumSubjects, fill=BinColor)) +
    geom_bar(stat="identity") + scale_y_continuous(limits = c(0,plotMax), breaks = seq(0, plotMax, by = 10)) +
    xlab("Number of Points Assigned to Proposal") + ylab("Number of Subjects") +
    theme(axis.text.x=element_text(size=12, vjust = -.05, angle = 90), legend.text=element_text(size=16), title=element_text(size=14)) +
    ggtitle(paste("Population Pref. Histogram - ",propTitle, sep = "")) + guides(fill = F)
  
  #annotations
  refPosMean <- round(mean(ProposalPos$CurrentPref), digits = 2)
  refPosTotal <- sum(ProposalPos$CurrentPref)
  refNegMean <- abs(round(mean(ProposalNeg$CurrentPref), digits = 2))
  refNegTotal <- abs(sum(ProposalNeg$CurrentPref))
  
  # Here we determine the winning side based on simple majority
  winningSide <- ifelse(nrow(ProposalPos) > nrow(ProposalNeg), "In Favor", "Against")
  
  l1 <- paste("Winning Side: ", winningSide, sep = "")
  l2 <- paste("Points In Favor (Total/Mean): ",refPosTotal,"/",refPosMean, sep = "")
  l3 <- paste("Points Against (Total/Mean): ",refNegTotal,"/",refNegMean, sep = "")
  l4 <- paste("Votes on Abstain: ", nrow(ProposalZero), sep = "")
  l5 <- paste("Votes In Favor: ", nrow(ProposalPos), sep = "")
  l6 <- paste("Votes Against: ", nrow(ProposalNeg), sep = "")
  
  intensityPlot <- intensityPlot + annotate("text", x = 17, y = c(120,114,108,102,96,90), label = c(l1,l2,l3,l4,l5,l6))
  print(intensityPlot)
  ggsave(intensityPlot, file=paste("California Stage 2 - Experiment Plots/Population Preference Intensity Histograms & CDFs/Population Pref. Hist - ", propTitle,".png", sep = ""), height = 4.5, width = 9.5)
}


ProposalCDF <- function(Pop_data, Proposal)
{

  #--- Determines the current proposal
  
  if(Proposal == "BilingualEduc")
  {
    propTitle <- "Bilingual Education"
    Pop_data[["CurrentPref"]] <- Pop_data[["EducationPref"]]
    Pop_data[["CurrentProp"]] <- Pop_data[["Ref1_BilingualEduc"]]
  }
  
  if(Proposal == "Immigration")
  {
    propTitle <- "Immigration"
    Pop_data[["CurrentPref"]] <- Pop_data[["ImmigrationPref"]]
    Pop_data[["CurrentProp"]] <- Pop_data[["Ref4_Immigration"]]
  }
  
  if(Proposal == "TeacherTenure")
  {
    propTitle <- "Teacher Tenure"
    Pop_data[["CurrentPref"]] <- Pop_data[["TeachersPref"]]
    Pop_data[["CurrentProp"]] <- Pop_data[["Ref2_TeacherTenure"]]
  }
  
  if(Proposal == "PublicBonds")
  {
    propTitle <- "Public Bonds"
    Pop_data[["CurrentPref"]] <- Pop_data[["BondsPref"]]
    Pop_data[["CurrentProp"]] <- Pop_data[["Ref3_PublicBonds"]]
  }
  
  Pop_data <- filter(Pop_data, CurrentProp != "Abstain") #remove abstain
  
  intensityCDF <- ggplot(data = Pop_data, aes(x=CurrentPref, colour = CurrentProp)) + 
    stat_ecdf(size=.75) + scale_x_continuous(breaks = c(0,20,40,60,80,100)) + ylab("Proportion") + 
    xlab("Points Assigned to Proposal")  + theme_classic() + ggtitle(paste("Population Pref. CDF - ", propTitle, sep = ""))+
    scale_colour_discrete(guide = guide_legend(title = NULL)) 
  print(intensityCDF)
  
  ggsave(intensityCDF, file=paste("California Stage 2 - Experiment Plots/Population Preference Intensity Histograms & CDFs/Population Pref. CDF - ", propTitle, ".png", sep = ""), height = 4, width = 5)
  
}



######################################
##### Population Hist & CDF (x4) #####
######################################


#--- Bilingual Educ

ProposalHist(Pop_data,Proposal = "BilingualEduc")
ProposalCDF(Pop_data_allPositive,Proposal = "BilingualEduc")

#--- Teacher Tenure

ProposalHist(Pop_data,Proposal = "TeacherTenure")
ProposalCDF(Pop_data_allPositive,Proposal = "TeacherTenure")

#--- Immigration

ProposalHist(Pop_data,Proposal = "Immigration")
ProposalCDF(Pop_data_allPositive,Proposal = "Immigration")

#--- Public Bonds

ProposalHist(Pop_data,Proposal = "PublicBonds")
ProposalCDF(Pop_data_allPositive,Proposal = "PublicBonds")



